library load

library(tidyverse)
library(glue)
library(tidybayes)
library(cowplot)
library(scales)
library(GGally)
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
## 
##     nasa
library(knitr)

theme_set(theme_cowplot())

load_existing_sim <- TRUE

I’m starting this prior check with the work I’ve already done in the sandbox. In those scripts I already got a sense of what weakly informative priors for my starting model might be. In those files, I only did a visual inspection of the prior predictive distribution, not neccesarily including any summary statistics. I also did not do any sort of checks based on the sample size of the small design I have.

model

Written down the model is:

\[ \begin{aligned} \mathrm{-likelihood-} \\ error_i &\sim (pMem_i)*VM(0, \kappa_i) + (1 - pMem_i)*Unif(-\pi,\pi) \\ \\ \mathrm{-param~transformation-} \\ \kappa_i &= sd2k(circ\_sd_i) \\ \\ \mathrm{-linear~model-} \\ circ\_sd_i &= exp(\alpha_{0,SUBJ[i]} + \alpha_{\Delta,SUBJ[i]} * postCond) ~~ \mathrm{-log~link~on~sd-} \\ pMem_i &= inv\_logit(\beta_{0,SUBJ[i]} + \beta_{\Delta,SUBJ[i]} * postCond) ~~ \mathrm{-logit~link~on~pMem-} \\ \\ \mathrm{-priors:~all~independent-}\\ \alpha_{0,SUBJ[...]} &\sim Normal(mu\_\alpha_0, sigma\_\alpha_0) \\ \alpha_{\Delta,SUBJ[...]} &\sim Normal(mu\_\alpha_{\Delta}, sigma\_\alpha_{\Delta}) \\ \beta_{0,SUBJ[...]} &\sim Normal(mu\_\beta_0, sigma\_\beta_0) \\ \beta_{\Delta,SUBJ[...]} &\sim Normal(mu\_\beta_{\Delta}, sigma\_\beta_{\Delta}) \\ \\ mu\_\alpha_0 &\sim Normal(4, 0.5)\\ sigma\_\alpha_0 &\sim Normal^+(0, 0.5) \\ mu\_\alpha_{\Delta} &\sim Normal(0, 0.5)\\ sigma\_\alpha_{\Delta} &\sim Normal^+(0, 0.5)\\ mu\_\beta_0 &\sim Normal(0, 1.5)\\ sigma\_\beta_0 &\sim Normal^+(0, 1.5)\\ mu\_\beta_{\Delta} &\sim Normal(0, 1)\\ sigma\_\beta_{\Delta} &\sim Normal^+(0, 1)\\ \end{aligned} \]


real obs load

I am considering the color stimulation data. samples sizes:

obs_data <- read_csv(glue('{params$model_dir_str}/data/stimulation_obvs.csv'))
## Parsed with column specification:
## cols(
##   subj = col_double(),
##   subj_index = col_double(),
##   stimulation = col_double(),
##   error = col_double()
## )
#summarize subj num, obs count, and obs condition split
(subj_summary <- obs_data %>%
  group_by(subj_index) %>%
  summarise(n_obs = n(), frac_stim = mean(stimulation)))
## # A tibble: 2 x 3
##   subj_index n_obs frac_stim
##        <dbl> <int>     <dbl>
## 1          1   252       0.5
## 2          2   252       0.5

2 subjs with 252 observations each, 126 per condition.

9/12 - I am thinking I should simulate varying effects using the actual samples sizes I have. I also think that this shouldnt matter (at least for a model like this). I’ll also try with a larger number of subjects.


Simulation

nsubj_sim <- 15
nobs_per_cond_sim <- 126

functions

source(glue("{params$common_dir_str}/simulation.R"))

run simulate

sim_datasets_fpath <- glue("{params$save_dir_str}/sim_datasets.rds")

found_existing_sim <- FALSE
if (file.exists(sim_datasets_fpath)){
  found_existing_sim <- TRUE
}


if (load_existing_sim == FALSE || found_existing_sim == FALSE){

  nsim_datasets <- 1200
  
  sim_priors <- tibble(
    sim_num = 1:nsim_datasets,
    alpha0_mu = rnorm(nsim_datasets, 4, 0.5),
    alpha0_sigma = abs(rnorm(nsim_datasets, 0, 0.5)),
    alphaD_mu =  rnorm(nsim_datasets, 0, 0.5),
    alphaD_sigma = abs(rnorm(nsim_datasets, 0, 0.5)),
    beta0_mu = rnorm(nsim_datasets, 0, 1.5),
    beta0_sigma = abs(rnorm(nsim_datasets, 0, 1.5)),
    betaD_mu = rnorm(nsim_datasets, 0, 1),
    betaD_sigma = abs(rnorm(nsim_datasets, 0, 1)),
    nsubj = nsubj_sim,
    nobs_per_cond = nobs_per_cond_sim
  )
  
  sim_datasets <- sim_priors %>%
    mutate(dataset = pmap(sim_priors, simulateData))
  
  saveRDS(sim_datasets, file = sim_datasets_fpath)

}else{
  
  sim_datasets <- readRDS(sim_datasets_fpath)
}

Check sim data

head(sim_datasets)
## # A tibble: 6 x 12
##   sim_num alpha0_mu alpha0_sigma alphaD_mu alphaD_sigma beta0_mu
##     <int>     <dbl>        <dbl>     <dbl>        <dbl>    <dbl>
## 1       1      4.34        0.759   -0.662         0.163   0.333 
## 2       2      4.47        0.588   -0.736         0.108   1.06  
## 3       3      3.80        1.17    -0.228         0.944   2.94  
## 4       4      3.58        0.132    0.605         0.512   0.766 
## 5       5      3.93        0.105   -0.0543        0.244   0.0556
## 6       6      4.44        0.181    0.198         0.208  -1.77  
## # … with 6 more variables: beta0_sigma <dbl>, betaD_mu <dbl>,
## #   betaD_sigma <dbl>, nsubj <dbl>, nobs_per_cond <dbl>, dataset <list>
head(sim_datasets$dataset[[1]])
## # A tibble: 6 x 7
##    subj subj_alpha0 subj_alphaD subj_beta0 subj_betaD nobs_per_condit…
##   <int>       <dbl>       <dbl>      <dbl>      <dbl>            <dbl>
## 1     1        4.59      -0.794      0.451      0.601              126
## 2     2        5.24      -0.448      0.359      1.41               126
## 3     3        4.07      -0.509      0.452      1.99               126
## 4     4        3.68      -0.678      0.351      0.991              126
## 5     5        4.46      -0.623      0.291      0.954              126
## 6     6        4.00      -0.544      0.371      1.02               126
## # … with 1 more variable: subj_obs <list>
head(sim_datasets$dataset[[1]]$subj_obs[[1]])
##   obs_radian obs_degree stimulation
## 1  -2.584711 -148.09301           0
## 2   1.768165  101.30840           0
## 3   1.803777  103.34883           0
## 4  -2.691468 -154.20977           0
## 5   2.991071  171.37576           0
## 6   1.330715   76.24434           0

Plot distributions and summary statistics

Ideas:

Definitely plot the priors on the intercept and slope means, transformed back into sd

plot the prior predicitive densities for subjects

Some other plot ideas:

shaded histogram plot, combining data across stimulation conditions - check if data looks too peaked or too flat

shaded histogram plot, one for each stimulation condition - same check ^

plot of density lines, one from each sim dataset, one with both conditions and one with them split out - this would help identify weird distribution more that shaded histogram perhaps

Correctness checks that prior samples are drawn correctly

sim_datasets %>%
  select(alpha0_mu, alpha0_sigma, alphaD_mu, alphaD_sigma) %>%
  ggpairs(progress = FALSE)

sim_datasets %>%
  select(beta0_mu, beta0_sigma, betaD_mu, betaD_sigma) %>%
  ggpairs(progress = FALSE)

Correctness check of likelihood simulation

param_set <- cross_df(list(pMem = seq(0, 1, 0.1), sd = seq(20, 50, 5)))

param_set %>%
  mutate(sims = map2(param_set$pMem, sd2k_vec(pracma::deg2rad(param_set$sd)), ~simulateData_likelihood(.x, .y, 1000))) %>%
  unnest(sims) %>% 
  ggplot(aes(x = obs_degree)) + 
  geom_histogram(binwidth = 1) +
  facet_grid(rows = vars(pMem), cols = vars(sd), labeller = 'label_both', scales = 'free')

param_set <- cross_df(list(pMem = seq(0, 1, 0.1), sd = seq(55, 105, 5)))

param_set %>%
  mutate(sims = map2(param_set$pMem, sd2k_vec(pracma::deg2rad(param_set$sd)), ~simulateData_likelihood(.x, .y, 1000))) %>%
  unnest(sims) %>% 
  ggplot(aes(x = obs_degree)) + 
  geom_histogram(binwidth = 1)+
  facet_grid(rows = vars(pMem), cols = vars(sd), labeller = 'label_both')

Notes

no notes


Prior distribution on von-mises circSD, linear model parameters

(pre group mean, post group mean, post-pre mean change)

No accounting for subject variance here

##
## alpha0_mu 
##

## linked prior distribution
alpha0_mu_hist <- sim_datasets %>%
  ggplot(aes(x = alpha0_mu)) + 
  geom_histogram(binwidth = 0.05, fill = "#F16C66") +
  theme(legend.position = "none",
        axis.title = element_text(size = 12),
        plot.title = element_text(size = 12)) + 
  labs(x = "linked prior on group mean for circSD, pre condition",
       title = "circSD group mean prior, alpha0_mu") + 
  scale_x_continuous(limits = c(1,7))
  

##
## alpha0_mu + alphaD_mu = alpha1_mu 
##

## linked prior distribution
alpha1_mu_hist <- sim_datasets %>%
  transmute(alpha1_mu = alpha0_mu + alphaD_mu) %>%
  ggplot(aes(x = alpha1_mu)) + 
  geom_histogram(binwidth = 0.05, fill = "#685369") +
  theme(axis.title = element_text(size = 12),
        plot.title = element_text(size = 12)) +
  labs(x = "linked prior on group mean for circSD, post condition",
       title = "circSD group mean prior, alpha0_mu + alphaD_mu") + 
  scale_x_continuous(limits = c(1,7))


plot_grid(alpha0_mu_hist, alpha1_mu_hist, nrow = 1, align = "h")
## Warning: Removed 2 rows containing missing values (geom_bar).

## Warning: Removed 2 rows containing missing values (geom_bar).

##
## alpha0_mu 
##

## delinked prior alpha0_mu
alpha0_mu_delinked_stats <- sim_datasets %>%
  transmute(delinked = exp(alpha0_mu)) %>%
  summarise(less_than_5 = sum(delinked < 5)/n(), greater_than_120 = sum(delinked > 120)/n())

alpha0_mu_delinked_hist <- sim_datasets %>%
  transmute(delinked = exp(alpha0_mu)) %>%
  ggplot(aes(x = delinked)) +
  geom_histogram(binwidth = 2, fill = "#F16C66") + 
  geom_vline(xintercept = c(5, 120), linetype = "dashed") + 
  labs(x = "prior on group mean for circSD, pre condition",
       title = "circSD group mean prior, exp(alpha0_mu)",
       subtitle = glue("prob(circSD < 5) = {alpha0_mu_delinked_stats$less_than_5}\nprob(circSD > 120) = {alpha0_mu_delinked_stats$greater_than_120}")) + 
  theme(axis.title = element_text(size = 12),
        plot.title = element_text(size = 12)) 
  
##
## alpha0_mu + alphaD_mu = alpha1_mu 
##  
  
## delinked prior alpha0_mu + alphaD_mu
alpha1_mu_delinked_hist <- sim_datasets %>%
  transmute(delinked = exp(alpha0_mu + alphaD_mu)) %>%
  ggplot(aes(x = delinked)) + 
  geom_histogram(binwidth = 2, fill = "#685369") + 
  labs(x = "prior on group mean for circSD, post condition",
       title = "circSD group mean prior, exp(alpha0_mu + alphaD_mu)") + 
  theme(axis.title = element_text(size = 12),
        plot.title = element_text(size = 12))


plot_grid(alpha0_mu_delinked_hist, alpha1_mu_delinked_hist, nrow = 1, align = "h")

##
## exp(alpha0_mu + alphaD_mu) - exp(alpha0_mu) plot
##

alphaD_mu_delinked_stats <- sim_datasets %>%
  transmute(delinked = exp(alpha0_mu + alphaD_mu) - exp(alpha0_mu)) %>%
  summarise(less_than_n100 = sum(delinked < -100)/n(), greater_than_100 = sum(delinked > 100)/n())

alphaD_mu_delinked_hist <- sim_datasets %>%
  transmute(delinked = exp(alpha0_mu + alphaD_mu) - exp(alpha0_mu)) %>%
  ggplot(aes(x = delinked)) + 
  geom_histogram(binwidth = 2, fill = "#00AEB2") + 
  geom_vline(xintercept = c(-100, 100), linetype = "dashed") + 
  labs(x = "prior on group mean for delta-circSD, mean post minus mean pre",
       title = "delta-circSD group mean prior, exp(alpha0_mu + alphaD_mu) - exp(alpha0_mu)",
       subtitle = glue("prob(delta-circSD < -100) = {alphaD_mu_delinked_stats$less_than_n100}\nprob(delta-circSD > 100) = {alphaD_mu_delinked_stats$greater_than_100}")) + 
  theme(axis.title = element_text(size = 12),
        plot.title = element_text(size = 12)) + 
  scale_x_continuous(breaks= pretty_breaks(10))

alphaD_mu_delinked_hist

Notes

no notes


Prior predicitive distribution on circSD at the subject level

(based on simulated subjects from each dataset)

These plots indicate the effects of the priors on alpha0_mu, alpha0_sigma, alphaD_mu, alphaD_sigma.

Marginal prior on subjects pre circSD, post circSD, and effect size

(ignoring simulation sample sizes)

sim_datasets_unnest <- sim_datasets %>%
  unnest(dataset) %>%
  mutate(alpha0_mu_delinked = exp(alpha0_mu),
         alpha1_mu_delinked = exp(alpha0_mu + alphaD_mu),
         subj_pre_circSD = exp(subj_alpha0),
         subj_post_circSD = exp(subj_alphaD + subj_alpha0),
         subj_circSD_ES = subj_post_circSD - subj_pre_circSD) 
subj_pre_circSD_plot <- sim_datasets_unnest %>%
  mutate(subj_pre_circSD = if_else(subj_pre_circSD > 120, 120, subj_pre_circSD)) %>%
  group_by(sim_num) %>%
  sample_n(1) %>%
  ungroup() %>%
  ggplot(aes(x = subj_pre_circSD)) + 
  geom_histogram(binwidth = 5, fill = "red", alpha = 0.3) + 
  labs(x = "subj_pre_circSD [prior predictive distribution]",
       subtitle = glue("p(<5) = {sum(sim_datasets_unnest$subj_pre_circSD < 5)/length(sim_datasets_unnest$subj_pre_circSD)}\n p(>120) = {sum(sim_datasets_unnest$subj_pre_circSD > 120)/length(sim_datasets_unnest$subj_pre_circSD)}"))


subj_post_circSD_plot <- sim_datasets_unnest %>%
  mutate(subj_post_circSD = if_else(subj_post_circSD > 120, 120, subj_post_circSD)) %>%
  group_by(sim_num) %>%
  sample_n(1) %>%
  ungroup() %>%
  ggplot(aes(x = subj_post_circSD)) + 
  geom_histogram(binwidth = 5, fill = "red", alpha = 0.3) + 
  labs(x = "subj_post_circSD [prior predictive distribution]",
       subtitle = glue("p(<5) = {sum(sim_datasets_unnest$subj_post_circSD < 5)/length(sim_datasets_unnest$subj_post_circSD)}\n p(>120) = {sum(sim_datasets_unnest$subj_post_circSD > 120)/length(sim_datasets_unnest$subj_post_circSD)}"))


subj_circSD_ES_plot <- sim_datasets_unnest %>%
  mutate(subj_circSD_ES = if_else(subj_circSD_ES > 100, 100, subj_circSD_ES)) %>%
  mutate(subj_circSD_ES = if_else(subj_circSD_ES < -100, -100, subj_circSD_ES)) %>%
  group_by(sim_num) %>%
  sample_n(1) %>%
  ungroup() %>%
  ggplot(aes(x = subj_circSD_ES)) + 
  geom_histogram(binwidth = 5, fill = "red", alpha = 0.7) + 
  labs(x = "subj_circSD_ES [prior predictive distribution]",
       subtitle = glue("p(< -100) = {sum(sim_datasets_unnest$subj_circSD_ES < -100)/length(sim_datasets_unnest$subj_circSD_ES)}\n p(>100) = {sum(sim_datasets_unnest$subj_circSD_ES > 100)/length(sim_datasets_unnest$subj_circSD_ES)}"))

plot_grid(subj_pre_circSD_plot, subj_post_circSD_plot, subj_circSD_ES_plot, ncol = 1)

Calculate min and max subject pre, post, and circSD effect size in each simulation.

sim_subj_extremes <- sim_datasets %>% 
  unnest(dataset) %>%
  mutate(subj_alpha1 = subj_alpha0 + subj_alphaD, 
         subj_alpha0_delinked = exp(subj_alpha0), 
         subj_alpha1_delinked = exp(subj_alpha1), 
         subj_circSD_effect = subj_alpha1_delinked - subj_alpha0_delinked ) %>%
  group_by(sim_num) %>%
  summarize_at(vars(subj_alpha0_delinked, subj_alpha1_delinked, subj_circSD_effect), list(max = max, min = min)) 

Across-sim SD of circSD

simSD <- sim_datasets %>% 
  unnest(dataset) %>%
  mutate(subj_pre_circSD = exp(subj_alpha0),
         subj_post_circSD = exp(subj_alpha0 + subj_alphaD)) %>%
  group_by(sim_num) %>%
  summarise_at(vars(subj_pre_circSD, subj_post_circSD), list(mean = mean, sd = sd))
plot_grid(
  
  ggplot(simSD, aes(subj_pre_circSD_sd)) + 
    geom_histogram(binwidth = 2) + 
    xlim(0, 200) + 
    labs(x = "pre condition: observed SD of distribution of circSD, per sim", 
         subtitle = glue::glue("p(>50) = {mean(simSD$subj_pre_circSD_sd > 50)}")),
  
  ggplot(simSD, aes(subj_post_circSD_sd)) + 
    geom_histogram(binwidth = 2) +
    xlim(0, 200) + 
    labs(x = "post condition: observed SD of distribution of circSD, per sim",
        subtitle = glue::glue("p(>50) = {mean(simSD$subj_post_circSD_sd > 50)}")),
  
  ncol = 1
  )                                                             
## Warning: Removed 14 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).
## Warning: Removed 68 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).

plot_grid(
  
  ggplot(simSD, aes(subj_pre_circSD_mean, subj_pre_circSD_sd)) + 
    geom_point() + 
    xlim(0, 200) + ylim(0, 100) + 
    labs(x = "pre condition: observed mean of distribution of circSD, per sim",
         y = "pre condition: observed sd"),
  
  ggplot(simSD, aes(subj_post_circSD_mean, subj_post_circSD_sd)) + 
    geom_point() +
    xlim(0, 200) + ylim(0, 100) + 
    labs(x = "post condition: observed mean of distribution of circSD, per sim",
         y = "post condition: observed sd"),
  
  ncol = 1
  )                                                             
## Warning: Removed 71 rows containing missing values (geom_point).
## Warning: Removed 198 rows containing missing values (geom_point).

Across-sim max circSD

#What is the max subject pre condition value in each dataset
pre_plot <- sim_subj_extremes %>%
  mutate(subj_alpha0_delinked_max = if_else(subj_alpha0_delinked_max > 120, 120, subj_alpha0_delinked_max)) %>%
  ggplot() + 
  geom_histogram(aes(x = subj_alpha0_delinked_max, fill = "red", alpha = 0.3), binwidth = 5) +
  theme(legend.position = "none") + 
  labs(x = "pre condition: max subj circSD per sim [subj_alpha0_delinked_max]",
       subtitle = glue("p(<5) = {sum(sim_subj_extremes$subj_alpha0_delinked_max < 5)/length(sim_subj_extremes$subj_alpha0_delinked_max)}\n p(>120) = {sum(sim_subj_extremes$subj_alpha0_delinked_max > 120)/length(sim_subj_extremes$subj_alpha0_delinked_max)}"))

#What is the max subject post condition value in each dataset
post_plot <- sim_subj_extremes %>%
  mutate(subj_alpha1_delinked_max = if_else(subj_alpha1_delinked_max > 120, 120, subj_alpha1_delinked_max)) %>%
  ggplot() + 
  geom_histogram(aes(x = subj_alpha1_delinked_max, fill = "red", alpha = 0.3), binwidth = 5) + 
  theme(legend.position = "none") + 
  labs(x = "post condition: max subj circSD per sim [subj_alpha1_delinked_max]",
       subtitle = glue("p(<5) = {sum(sim_subj_extremes$subj_alpha1_delinked_max < 5)/length(sim_subj_extremes$subj_alpha1_delinked_max)}\n p(>120) = {sum(sim_subj_extremes$subj_alpha1_delinked_max > 120)/length(sim_subj_extremes$subj_alpha1_delinked_max)}"))


plot_grid(pre_plot, post_plot, ncol =1, align = 'v')

Across-sim min circSD

pre_plot <- sim_subj_extremes %>%
  mutate(subj_alpha0_delinked_min = if_else(subj_alpha0_delinked_min > 120, 120, subj_alpha0_delinked_min)) %>%
  ggplot() + 
  geom_histogram(aes(x = subj_alpha0_delinked_min, fill = "red", alpha = 0.3), binwidth = 5) +
  theme(legend.position = "none") + 
  labs(x = "pre condition: min subj circSD per sim [subj_alpha0_delinked_min]",
       subtitle = glue("p(<5) = {sum(sim_subj_extremes$subj_alpha0_delinked_min < 5)/length(sim_subj_extremes$subj_alpha0_delinked_min)}\n p(>120) = {sum(sim_subj_extremes$subj_alpha0_delinked_min > 120)/length(sim_subj_extremes$subj_alpha0_delinked_min)}"))

#What is the most extreme subject post condition value in each dataset
post_plot <- sim_subj_extremes %>%
  mutate(subj_alpha1_delinked_min = if_else(subj_alpha1_delinked_min > 120, 120, subj_alpha1_delinked_min)) %>%
  ggplot() + 
  geom_histogram(aes(x = subj_alpha1_delinked_min, fill = "red", alpha = 0.3), binwidth = 5) + 
  theme(legend.position = "none") + 
  labs(x = "post condition: min subj circSD per sim [subj_alpha1_delinked_min]",
       subtitle = glue("p(<5) = {sum(sim_subj_extremes$subj_alpha1_delinked_min < 5)/length(sim_subj_extremes$subj_alpha1_delinked_min)}\n p(>120) = {sum(sim_subj_extremes$subj_alpha1_delinked_min > 120)/length(sim_subj_extremes$subj_alpha1_delinked_min)}"))


plot_grid(pre_plot, post_plot, ncol =1, align = 'v')

Across-sim max+min circSD effect size

#What is the most extreme change from pre to post in a subject

min_plot <- sim_subj_extremes %>%
  mutate(subj_circSD_effect_min = if_else(subj_circSD_effect_min > 100 , 100, subj_circSD_effect_min)) %>%
  mutate(subj_circSD_effect_min = if_else(subj_circSD_effect_min < -100 , -100, subj_circSD_effect_min)) %>%
  ggplot() + 
  geom_histogram(aes(x = subj_circSD_effect_min, fill = "red", alpha = 0.3), binwidth = 5) +
  theme(legend.position = "none") + 
  labs(x = "min subj circSD effect per sim \n[min(subj_alpha1_delinked - subj_alpha0_delinked)]",
       subtitle = glue("p(<-100) = {sum(sim_subj_extremes$subj_circSD_effect_min < -100)/length(sim_subj_extremes$subj_circSD_effect_min)}\n p(>100) = {sum(sim_subj_extremes$subj_circSD_effect_min > 100)/length(sim_subj_extremes$subj_circSD_effect_min)}"))

#What is the most extreme subject post condition value in each dataset
max_plot <- sim_subj_extremes %>%
  mutate(subj_circSD_effect_max = if_else(subj_circSD_effect_max > 100 , 100, subj_circSD_effect_max)) %>%
  mutate(subj_circSD_effect_max = if_else(subj_circSD_effect_max < -100 , -100, subj_circSD_effect_max)) %>%
  ggplot() + 
  geom_histogram(aes(x = subj_circSD_effect_max, fill = "red", alpha = 0.3), binwidth = 5) +
  theme(legend.position = "none") + 
  labs(x = "max subj circSD effect per sim \n[max(subj_alpha1_delinked - subj_alpha0_delinked)]",
       subtitle = glue("p(<-100) = {sum(sim_subj_extremes$subj_circSD_effect_max < -100)/length(sim_subj_extremes$subj_circSD_effect_max)}\n p(>100) = {sum(sim_subj_extremes$subj_circSD_effect_max > 100)/length(sim_subj_extremes$subj_circSD_effect_max)}"))

plot_grid(min_plot, max_plot, ncol =1, align = 'v')

Notes

no notes


Prior distribution on mixture probability pMem, linear model parameters

(pre group mean, post group mean, post-pre mean change)

## linked prior distribution


plot_grid(
  
  sim_datasets %>%
  ggplot(aes(x = beta0_mu)) + 
  geom_histogram(binwidth = 0.05, fill = "#F16C66") +
  theme(legend.position = "none",
        axis.title = element_text(size = 12),
        plot.title = element_text(size = 12)) + 
  labs(x = "linked prior on group mean for pMem, pre condition",
       title = "pMem group mean prior, beta0_mu") + 
  scale_x_continuous(limits = c(-5,5))
  
  ,
  
  sim_datasets %>%
  transmute(beta1_mu = beta0_mu + betaD_mu) %>%
  ggplot(aes(x = beta1_mu)) + 
  geom_histogram(binwidth = 0.05, fill = "#685369") +
  theme(axis.title = element_text(size = 12),
        plot.title = element_text(size = 12)) +
  labs(x = "linked prior on group mean for pMem, post condition",
       title = "pMem group mean prior, beta0_mu + betaD_mu") + 
  scale_x_continuous(limits = c(-5,5))
  
  ,
  
  nrow = 1,
  align = "h"
)
## Warning: Removed 1 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).
## Warning: Removed 12 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).

##
## inv_logit(beta0_mu)
##

##
## inv_logit(beta0_mu + betaD_mu) = inv_logit(beta1_mu) 
##  

## delinked prior alpha0_mu
beta0_mu_delinked_stats <- sim_datasets %>%
  transmute(delinked = exp(beta0_mu)/(exp(beta0_mu) + 1)) %>%
  summarise(less_than_5 = sum(delinked < 0.05)/n(), greater_than_95 = sum(delinked > 0.95)/n())

plot_grid(
  
  sim_datasets %>%
  transmute(delinked = exp(beta0_mu)/(exp(beta0_mu) + 1)) %>%
  ggplot(aes(x = delinked)) +
  geom_histogram(binwidth = 0.03, fill = "#F16C66") + 
  geom_vline(xintercept = c(0.05, 0.95), linetype = "dashed") + 
  labs(x = "prior on group mean for pMem, pre condition",
       title = "pMem group mean prior, inv_logit(beta0_mu)",
       subtitle = glue("prob(pMem < 0.05) = {beta0_mu_delinked_stats$less_than_5}\nprob(pMem > 0.95) = {beta0_mu_delinked_stats$greater_than_95}")) + 
  theme(axis.title = element_text(size = 12),
        plot.title = element_text(size = 12))
  
  ,
  
  sim_datasets %>%
  transmute(delinked = exp(beta0_mu + betaD_mu)/(exp(beta0_mu + betaD_mu) + 1)) %>%
  ggplot(aes(x = delinked)) + 
  geom_histogram(binwidth = 0.03, fill = "#685369") + 
  labs(x = "prior on group mean for pMem, post condition",
       title = "pMem group mean prior, inv_logit(alpha0_mu + alphaD_mu)") + 
  theme(axis.title = element_text(size = 12),
        plot.title = element_text(size = 12))
  
  ,
  
  nrow = 1,
  align = "h"
)

##
## inv_logit(beta0_mu + betaD_mu) - inv_logit(beta0_mu) plot
##

betaD_mu_delinked_stats <- sim_datasets %>%
  transmute(delinked = exp(beta0_mu + betaD_mu)/(exp(beta0_mu + betaD_mu) + 1) - exp(beta0_mu)/(exp(beta0_mu) + 1)) %>%
  summarise(less_than_n80 = sum(delinked < -0.8)/n(), greater_than_80 = sum(delinked > 0.8)/n())

sim_datasets %>%
  transmute(delinked = exp(beta0_mu + betaD_mu)/(exp(beta0_mu + betaD_mu) + 1) - exp(beta0_mu)/(exp(beta0_mu) + 1)) %>%
  ggplot(aes(x = delinked)) + 
  geom_histogram(binwidth = 0.02, fill = "#00AEB2") + 
  geom_vline(xintercept = c(-0.8, 0.8), linetype = "dashed") + 
  labs(x = "prior on group mean for delta-pMem, mean post minus mean pre",
       title = "delta-pMem group mean prior, inv_logit(beta0_mu + betaD_mu) - inv_logit(beta0_mu)",
       subtitle = glue("prob(delta-pMem < -0.8) = {betaD_mu_delinked_stats$less_than_n80}\nprob(delta-pMem > 0.8) = {betaD_mu_delinked_stats$greater_than_80}")) + 
  theme(axis.title = element_text(size = 12),
        plot.title = element_text(size = 12)) + 
  scale_x_continuous(breaks= pretty_breaks(10))

Notes

no notes


Prior predicitive distribution of pMem at the subject level

(based on simulated subjects from each dataset)

Marginal prior on subjects pre pMem, post pMem, and effect size

(ignoring simulation sample sizes)

sim_datasets_unnest <- sim_datasets %>%
  unnest(dataset) %>%
  mutate(beta0_mu_delinked = exp(beta0_mu)/(exp(beta0_mu) + 1),
         beta1_mu_delinked = exp(beta0_mu + betaD_mu)/(exp(beta0_mu + betaD_mu) + 1),
         subj_pre_pMem = exp(subj_beta0)/(exp(subj_beta0) + 1),
         subj_post_pMem = exp(subj_betaD + subj_beta0)/(exp(subj_betaD + subj_beta0) + 1),
         subj_pMem_ES = subj_post_pMem - subj_pre_pMem) 
pMem_ES_quantiles <- quantile(sim_datasets_unnest$subj_pMem_ES, c(0.025, 0.975))

plot_grid(
  
  sim_datasets_unnest %>%
  group_by(sim_num) %>%
  sample_n(1) %>%
  ungroup() %>%
  ggplot(aes(x = subj_pre_pMem)) + 
  geom_histogram(binwidth = 0.01, fill = "red", alpha = 0.3) + 
  labs(x = "subj_pre_pMem [prior predictive distribution]",
       subtitle = glue("p(< 0.05) = {sum(sim_datasets_unnest$subj_pre_pMem < 0.05)/length(sim_datasets_unnest$subj_pre_pMem)}\n p(> 0.95) = {sum(sim_datasets_unnest$subj_pre_pMem > 0.95)/length(sim_datasets_unnest$subj_pre_pMem)}"))
  
  ,
  
  sim_datasets_unnest %>%
  group_by(sim_num) %>%
  sample_n(1) %>%
  ungroup() %>%  
  ggplot(aes(x = subj_post_pMem)) + 
  geom_histogram(binwidth = 0.01, fill = "red", alpha = 0.3) + 
  labs(x = "subj_post_pMem [prior predictive distribution]",
       subtitle = glue("p(< 0.05) = {sum(sim_datasets_unnest$subj_post_pMem < 0.05)/length(sim_datasets_unnest$subj_post_pMem)}\n p(> 0.95) = {sum(sim_datasets_unnest$subj_post_pMem > 0.95)/length(sim_datasets_unnest$subj_post_pMem)}"))
  
  ,
  
  sim_datasets_unnest %>%
  group_by(sim_num) %>%
  sample_n(1) %>%
  ungroup() %>%  
  ggplot(aes(x = subj_pMem_ES)) + 
  geom_histogram(binwidth = 0.01, fill = "red", alpha = 0.7) + 
  geom_vline(xintercept = pMem_ES_quantiles, linetype = "dashed") + 
  labs(x = "subj_pMem_ES [prior predictive distribution], (5%, 95%) quantile lines",
       subtitle = glue("p(< -0.8) = {sum(sim_datasets_unnest$subj_pMem_ES < -0.8)/length(sim_datasets_unnest$subj_pMem_ES)}\n p(> 0.8) = {sum(sim_datasets_unnest$subj_pMem_ES > 0.80)/length(sim_datasets_unnest$subj_pMem_ES)}"))
  
  ,
  
  ncol = 1
)

Calculate min and max subject pre, post, and pMem effect size in each simulation.

sim_subj_extremes <- sim_datasets %>% 
  unnest(dataset) %>%
  mutate(subj_beta1 = subj_beta0 + subj_betaD, 
         subj_beta0_delinked = exp(subj_beta0)/(exp(subj_beta0) + 1), 
         subj_beta1_delinked = exp(subj_beta0 + subj_betaD)/(exp(subj_beta0 + subj_betaD) + 1), 
         subj_pMem_effect = subj_beta1_delinked - subj_beta0_delinked ) %>%
  group_by(sim_num) %>%
  summarize_at(vars(subj_beta0_delinked, subj_beta1_delinked, subj_pMem_effect), list(max = max, min = min)) 

Across-sim max pMem

plot_grid(

  #What is the max subject pre condition value in each dataset
  sim_subj_extremes %>%
  ggplot() + 
  geom_histogram(aes(x = subj_beta0_delinked_max, fill = "red", alpha = 0.3), binwidth = 0.02) +
  theme(legend.position = "none") + 
  labs(x = "pre condition: max subj pMem per sim [subj_beta0_delinked_max]",
       subtitle = glue("p(< 0.05) = {sum(sim_subj_extremes$subj_beta0_delinked_max < 0.05)/length(sim_subj_extremes$subj_beta0_delinked_max)}\n p(> 0.95) = {sum(sim_subj_extremes$subj_beta0_delinked_max > 0.95)/length(sim_subj_extremes$subj_beta0_delinked_max)}"))
  
  ,

  #What is the max subject post condition value in each dataset
  sim_subj_extremes %>%
  ggplot() + 
  geom_histogram(aes(x = subj_beta1_delinked_max, fill = "red", alpha = 0.3), binwidth = 0.02) + 
  theme(legend.position = "none") + 
  labs(x = "post condition: max subj pMem per sim [subj_beta1_delinked_max]",
       subtitle = glue("p(< 0.05) = {sum(sim_subj_extremes$subj_beta1_delinked_max < 0.05)/length(sim_subj_extremes$subj_beta1_delinked_max)}\n p(>0.95) = {sum(sim_subj_extremes$subj_beta1_delinked_max > 0.95)/length(sim_subj_extremes$subj_beta1_delinked_max)}"))

  ,
  ncol =1,
  align = 'v'

)

Across-sim min pMem

plot_grid(

  #What is the min subject pre condition value in each dataset
  sim_subj_extremes %>%
  ggplot() + 
  geom_histogram(aes(x = subj_beta0_delinked_min, fill = "red", alpha = 0.3), binwidth = 0.02) +
  theme(legend.position = "none") + 
  labs(x = "pre condition: min subj pMem per sim [subj_beta0_delinked_min]",
       subtitle = glue("p(< 0.05) = {sum(sim_subj_extremes$subj_beta0_delinked_min < 0.05)/length(sim_subj_extremes$subj_beta0_delinked_min)}\n p(> 0.95) = {sum(sim_subj_extremes$subj_beta0_delinked_min > 0.95)/length(sim_subj_extremes$subj_beta0_delinked_min)}"))
  
  ,

  #What is the min subject post condition value in each dataset
  sim_subj_extremes %>%
  ggplot() + 
  geom_histogram(aes(x = subj_beta1_delinked_min, fill = "red", alpha = 0.3), binwidth = 0.02) + 
  theme(legend.position = "none") + 
  labs(x = "post condition: min subj pMem per sim [subj_beta1_delinked_min]",
       subtitle = glue("p(< 0.05) = {sum(sim_subj_extremes$subj_beta1_delinked_min < 0.05)/length(sim_subj_extremes$subj_beta1_delinked_min)}\n p(>0.95) = {sum(sim_subj_extremes$subj_beta1_delinked_min > 0.95)/length(sim_subj_extremes$subj_beta1_delinked_min)}"))

  ,
  ncol =1,
  align = 'v'

)

Across-sim max+min pMem effect size

#What is the most extreme change from pre to post in a subject

min_plot <- sim_subj_extremes %>%
  mutate(subj_pMem_effect_min = if_else(subj_pMem_effect_min > 1 , 1, subj_pMem_effect_min)) %>%
  mutate(subj_pMem_effect_min = if_else(subj_pMem_effect_min < -1 , -1, subj_pMem_effect_min)) %>%
  ggplot() + 
  geom_histogram(aes(x = subj_pMem_effect_min, fill = "red", alpha = 0.3), binwidth = 0.03) +
  theme(legend.position = "none") + 
  labs(x = "min subj pMem effect per sim \n[min(subj_beta1_delinked - subj_beta0_delinked)]",
       subtitle = glue("p(<-0.8) = {sum(sim_subj_extremes$subj_pMem_effect_min < -0.8)/length(sim_subj_extremes$subj_pMem_effect_min)}\n p(>0.8) = {sum(sim_subj_extremes$subj_pMem_effect_min > 0.8)/length(sim_subj_extremes$subj_pMem_effect_min)}")) + 
  xlim(c(-1, 1))

#What is the most extreme subject post condition value in each dataset
max_plot <- sim_subj_extremes %>%
  mutate(subj_pMem_effect_max = if_else(subj_pMem_effect_max > 1 , 1, subj_pMem_effect_max)) %>%
  mutate(subj_pMem_effect_max = if_else(subj_pMem_effect_max < -1 , -1, subj_pMem_effect_max)) %>%
  ggplot() + 
  geom_histogram(aes(x = subj_pMem_effect_max, fill = "red", alpha = 0.3), binwidth = 0.03) +
  theme(legend.position = "none") + 
  labs(x = "max subj pMem effect per sim \n[max(subj_beta1_delinked - subj_beta0_delinked)]",
       subtitle = glue("p(<-0.8) = {sum(sim_subj_extremes$subj_pMem_effect_max < -0.8)/length(sim_subj_extremes$subj_pMem_effect_max)}\n p(>0.8) = {sum(sim_subj_extremes$subj_pMem_effect_max > 0.8)/length(sim_subj_extremes$subj_pMem_effect_max)}")) + 
    xlim(c(-1, 1))

plot_grid(min_plot, max_plot, ncol =1, align = 'v')
## Warning: Removed 2 rows containing missing values (geom_bar).

## Warning: Removed 2 rows containing missing values (geom_bar).

Notes

no notes


Joint prior on pre condition + effect size

joint_pre_plot <- sim_datasets_unnest %>% 
  group_by(sim_num) %>%
  sample_n(1) %>%
  ungroup() %>%
  ggplot(aes(y = exp(subj_alpha0), x = exp(subj_beta0)/(exp(subj_beta0) + 1))) +
    geom_point() +
    labs(x = 'pMem, pre', y = 'circSD, pre')

plot_grid(
  joint_pre_plot,
  ncol = 1,
  align = 'v'
)

joint_ES_plot <- sim_datasets_unnest %>% 
  group_by(sim_num) %>%
  sample_n(1) %>%
  ungroup() %>%
  mutate(subj_pMem_ES = exp(subj_beta0 + subj_betaD)/(exp(subj_beta0 + subj_betaD) + 1) - exp(subj_beta0)/(exp(subj_beta0) + 1),
         subj_circSD_ES = exp(subj_alpha0 + subj_alphaD) - exp(subj_alpha0)) %>%
  ggplot(aes(x = subj_pMem_ES, y = subj_circSD_ES)) +
    geom_point() +
    geom_vline(xintercept = 0, linetype = 'dashed') + 
    geom_hline(yintercept = 0, linetype = 'dashed')
  
plot_grid(
  joint_ES_plot,
  joint_ES_plot + ylim(c(-200, 200)) + labs(subtitle = "zoom in"),
  ncol = 1,
  align = 'v'
)
## Warning: Removed 44 rows containing missing values (geom_point).


Prior predicitive distribution at the observation level, per condition

(based on simulated obs from each dataset, ignoring subject labels)

sim_subj_obs_hist_count <- function(dataset, condition = 0){
  
  dataset_obs <- dataset %>% 
    unnest(subj_obs) %>%
    filter(stimulation == condition) %>%
    select(obs_degree)
  
  breaks <- seq(-180, 180, 5)
  
  bincount <- hist(dataset_obs$obs_degree, breaks = breaks, plot = FALSE)$counts
  
  bincount_names <- glue("c{breaks[-1]}")
  
  names(bincount) <- bincount_names
  bincount_df <- data.frame(as.list(bincount))

  return(bincount_df)
  
}

make_quantmat <- function(sim_datasets, condition = 0){

  bincounts <- sim_datasets %>% 
  select(dataset) %>% 
  mutate(subj_hist_counts = map(dataset, sim_subj_obs_hist_count, condition)) %>% 
  select(-dataset) %>% 
  unnest(subj_hist_counts) %>%
  as_tibble()


  xvals <- seq(-177.5, 177.5, 5)
  probs <- seq(0.1,0.9,0.1)

  quantmat <- as.data.frame(matrix(NA, nrow=ncol(bincounts), ncol=length(probs)))
  names(quantmat) <- paste0("p",probs)

  quantmat <- cbind(quantmat, xvals)

  for (iQuant in 1:length(probs)){
   quantmat[,paste0("p",probs[iQuant])] <- as.numeric(summarise_all(bincounts, ~quantile(., probs[iQuant])))
  }
    
  return(quantmat)
}

# calculate quantile mats from each condition
quantmat_cond0 <- make_quantmat(sim_datasets, 0)
quantmat_cond1 <- make_quantmat(sim_datasets, 1)
                      
  
c_light <- "#DCBCBC"
c_light_highlight <- "#C79999"
c_mid   <- "#B97C7C"
c_mid_highlight   <- "#A25050"
c_dark  <- "#8F2727"
c_dark_highlight  <- "#7C0000"


plot_grid(          
                                                                  
  ggplot(quantmat_cond0, aes(x = xvals)) + 
    geom_ribbon(aes(ymax = p0.9, ymin = p0.1), fill = c_light) + 
    geom_ribbon(aes(ymax = p0.8, ymin = p0.2), fill = c_light_highlight) + 
    geom_ribbon(aes(ymax = p0.7, ymin = p0.3), fill = c_mid) + 
    geom_ribbon(aes(ymax = p0.6, ymin = p0.4), fill = c_mid_highlight) + 
    geom_line(aes(y = p0.5), color = c_dark, size = 1) + 
    scale_x_continuous(breaks=pretty_breaks(10)) + 
    coord_cartesian(ylim = c(0, 150)) + 
    labs(x = "error (degrees)", y = "count +/- quantile", subtitle = "without stimulation")
  ,

  ggplot(quantmat_cond1, aes(x = xvals)) + 
    geom_ribbon(aes(ymax = p0.9, ymin = p0.1), fill = c_light) + 
    geom_ribbon(aes(ymax = p0.8, ymin = p0.2), fill = c_light_highlight) + 
    geom_ribbon(aes(ymax = p0.7, ymin = p0.3), fill = c_mid) + 
    geom_ribbon(aes(ymax = p0.6, ymin = p0.4), fill = c_mid_highlight) + 
    geom_line(aes(y = p0.5), color = c_dark, size = 1) + 
    scale_x_continuous(breaks=pretty_breaks(10)) +
    coord_cartesian(ylim = c(0, 150)) + 
    labs(x = "error (degrees)", y = "count +/- quantile", subtitle = "with stimulation")
  ,
  
  sim_datasets %>%
    sample_n(1) %>%
    unnest(dataset) %>%
    unnest(subj_obs) %>% 
    filter(stimulation == sample(c(0,1), 1)) %T>%
    print() %>%
    ggplot(aes(x = obs_degree)) + 
    geom_density() + 
    labs(subtitle = "correctness check: random simulation, random condition") + 
    scale_x_continuous(breaks=pretty_breaks(10))
  ,
  
  ncol = 1,
  align = "v"
)
## # A tibble: 1,890 x 20
##    sim_num alpha0_mu alpha0_sigma alphaD_mu alphaD_sigma beta0_mu
##      <int>     <dbl>        <dbl>     <dbl>        <dbl>    <dbl>
##  1     272      3.96        0.160     0.230        0.959    0.595
##  2     272      3.96        0.160     0.230        0.959    0.595
##  3     272      3.96        0.160     0.230        0.959    0.595
##  4     272      3.96        0.160     0.230        0.959    0.595
##  5     272      3.96        0.160     0.230        0.959    0.595
##  6     272      3.96        0.160     0.230        0.959    0.595
##  7     272      3.96        0.160     0.230        0.959    0.595
##  8     272      3.96        0.160     0.230        0.959    0.595
##  9     272      3.96        0.160     0.230        0.959    0.595
## 10     272      3.96        0.160     0.230        0.959    0.595
## # … with 1,880 more rows, and 14 more variables: beta0_sigma <dbl>,
## #   betaD_mu <dbl>, betaD_sigma <dbl>, nsubj <dbl>, nobs_per_cond <dbl>,
## #   subj <int>, subj_alpha0 <dbl>, subj_alphaD <dbl>, subj_beta0 <dbl>,
## #   subj_betaD <dbl>, nobs_per_condition <dbl>, obs_radian <dbl>,
## #   obs_degree <dbl>, stimulation <dbl>

Notes

no notes